import requests
from lxml import etree
import pymysql

class ZufangSpider(object):
    def __init__(self):
        self.headers = {
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36',
        }
        self.start_url = "https://xian.zu.fang.com/"


    def parse(self,url):
        resp = requests.get(url,self.headers)
        html = etree.HTML(resp.text)
        houseLists = html.xpath("//div[@class='houseList']/dl")
        houses = []

        for each_house in houseLists:
            name = each_house.xpath(".//p[@class='title']/a/text()")
            if len(name):
                # print("小区名称：",name[0])
                name = name[0].replace("","")
            info = each_house.xpath(".//p[contains(@class,'font15 mt12')]/text()")
            size = ""
            if len(info) > 0:
                size = info[2][:-2].strip()
                # print("房源大小：",size+"㎡")
                size = size+"㎡"
            price = each_house.xpath(".//span[@class='price']/text()")
            if price:
                # print("房源价格:",price[0])
                price = price[0]
            address = each_house.xpath(".//dd[contains(@class,'info')]/p[3]//text()")
            if address:
                address = "".join(address)
                address = address.replace("-","")
                # print("房源地址",address)
                address = address
                houses.append({'name':name,'size':size,"price":price,'address':address})
        return houses

    def _save_data(self,houses):
        conn = pymysql.connect(
            host='localhost',
            user='root',
            password='123456',
            db='zufang',
            charset='utf8'
        )
        cur = conn.cursor()
        #　异常处理
        try:
            for house in houses:
                sql = "insert into houses values (null,'%s','%s','%s','%s')"
                cur.execute(sql % (house.get('name'), house.get('price'), house.get('address'),house.get('size'),))
            conn.commit()
        except Exception as e:
            print(e)
            conn.rollback()

    def run(self,province):
        base_url = "https://%s.zu.fang.com/house/i3{}/" % province
        if province == "bj":
            base_url = "https://zu.fang.com/house/i3{}/"
        for i in range(1,10):
            houses = self.parse(base_url.format(i))
            self._save_data(houses=houses)


if __name__ == '__main__':
    zf = ZufangSpider()
    province = input("请输入要查找的城市名称简写：北京--> bj")
    zf.run(province)

